###################################################################################################
###################################       loadin  library     #####################################
###################################################################################################
library(gplots)
library(amap)
repeatsToFamily <- function(X,repeats_class_family){
    out <- c()
    for (each in X){
        out <- c(out,as.matrix(repeats_class_family[which(repeats_class_family[1]==each),3]))
    }
    return (out)
}

repeatsToClass <- function(X,repeats_class_family){
    out <- c()
    for (each in X){
        out <- c(out,as.matrix(repeats_class_family[which(repeats_class_family[1]==each),2]))
    }
    return (out)
}

SpecificGene <- function(x,given_time,mean_cutoff){
    # find whether this gene is specific expressed in the given time point
    if (mean(x) >= mean_cutoff & given_time %in% names(x)[order(x,decreasing=T)[1:1]]){
        return (x[given_time]/mean(x))
    }
    else{
        return (NA)
    }
}

pie2 <- function (x, labels = names(x), edges = 200, radius = 0.8, clockwise = FALSE, 
    init.angle = if (clockwise) 90 else 0, density = NULL, angle = 45, ll=0.35,llty=1,
    col = NULL, border = NULL, lty = NULL, main = NULL, ...) 
{
    if (!is.numeric(x) || any(is.na(x) | x < 0)) 
        stop("'x' values must be positive.")
    if (is.null(labels)) 
        labels <- as.character(seq_along(x))
    else labels <- as.graphicsAnnot(labels)
    x <- c(0, cumsum(x)/sum(x))
    dx <- diff(x)
    nx <- length(dx)
    plot.new()
    pin <- par("pin")
    xlim <- ylim <- c(-1, 1)
    if (pin[1L] > pin[2L]) 
        xlim <- (pin[1L]/pin[2L]) * xlim
    else ylim <- (pin[2L]/pin[1L]) * ylim
    dev.hold()
    on.exit(dev.flush())
    plot.window(xlim, ylim, "", asp = 1)
    if (is.null(col)) 
        col <- if (is.null(density)) 
            c("white", "lightblue", "mistyrose", "lightcyan", 
                "lavender", "cornsilk")
        else par("fg")
    if (!is.null(col)) 
        col <- rep_len(col, nx)
    if (!is.null(border)) 
        border <- rep_len(border, nx)
    if (!is.null(lty)) 
        lty <- rep_len(lty, nx)
    angle <- rep(angle, nx)
    if (!is.null(density)) 
        density <- rep_len(density, nx)
    twopi <- if (clockwise) 
        -2 * pi
    else 2 * pi
    t2xy <- function(t) {
        t2p <- twopi * t + init.angle * pi/180
        list(x = radius * cos(t2p), y = radius * sin(t2p))
    }
    for (i in 1L:nx) {
        n <- max(2, floor(edges * dx[i]))
        P <- t2xy(seq.int(x[i], x[i + 1], length.out = n))
        polygon(c(P$x, 0), c(P$y, 0), density = density[i], angle = angle[i], 
            border = border[i], col = col[i], lty = lty[i])
        P <- t2xy(mean(x[i + 0:1]))
        lab <- as.character(labels[i])
        if (!is.na(lab) && nzchar(lab)) {
            lines(c(1.1, 1+ll) * P$x, c(1.1, 1+ll) * P$y,lty=llty)
            text((1+ll*1.2) * P$x, (1+ll*1.2) * P$y, labels[i], xpd = TRUE, 
                adj = ifelse(P$x < 0, 1, 0),...)
        }
    }
    title(main = main, ...)
    invisible(NULL)
}

###################################################################################################
###################################         read data         #####################################
###################################################################################################
Oocyte <- 1:3; Zygote <- 4:6; cell2 <- 7:12; cell4 <- 13:24; cell8 <- 25:43; Morula <- 44:59;MTE <- c(63,66,68,69,71,76:79);PTE <- c(60:62,64,65,67,70,73,81); PE <- c(83:89);EPI <- c(72,74:75,80,82);hESC0 <- 90:95; hESC10 <- 96:121
development_repeats_fpkm <- read.table("../data/develop_repeats_fpkm.txt",row.names=1,header=T)
developmentRepeatsAverageFpkm <- cbind(apply(development_repeats_fpkm[Oocyte],1,mean),apply(development_repeats_fpkm[Zygote],1,mean),apply(development_repeats_fpkm[cell2],1,mean),
    apply(development_repeats_fpkm[cell4],1,mean),apply(development_repeats_fpkm[cell8],1,mean),apply(development_repeats_fpkm[Morula],1,mean),
    apply(development_repeats_fpkm[MTE],1,mean),apply(development_repeats_fpkm[PTE],1,mean),apply(development_repeats_fpkm[PE],1,mean),apply(development_repeats_fpkm[EPI],1,mean),
    apply(development_repeats_fpkm[hESC0],1,mean),apply(development_repeats_fpkm[hESC10],1,mean))
colnames(developmentRepeatsAverageFpkm) <- c("Oocyte","Zygote","2cell","4cell","8cell","Morula","MTE","PTE","PE","EPI","hESC0","hESC10")
develop_path <- c("Oocyte","Zygote","2cell","4cell","8cell","Morula","MTE","PTE","PE","EPI","hESC0","hESC10")
# developmentRepeatsAverageFpkm <- developmentRepeatsAverageFpkm[,develop_path]
AmplifyDevelopmentRepeatsAverageFpkm <- developmentRepeatsAverageFpkm * 1e3

he0 <- 1:2; he2 <- 3:4; he6 <- 5:6; hiF <- 7:8; n10 <- 9; n12 <- 10:11; n14 <- c(12:13,25); n20 <- c(14:15,26); n24m <- 16:17; n24p <- 18:19; n8 <- c(20:21,24); niPS <- 22:23
naive_repeats_fpkm <- read.table("../data/naive_repeats_fpkm.txt",row.names=1,header=T)
naiveRepeatsAverageFpkm <- cbind(apply(naive_repeats_fpkm[hiF],1,mean),apply(naive_repeats_fpkm[he0],1,mean),apply(naive_repeats_fpkm[he2],1,mean),
    apply(naive_repeats_fpkm[he6],1,mean),apply(naive_repeats_fpkm[n8],1,mean),apply(naive_repeats_fpkm[n12],1,mean),
    apply(naive_repeats_fpkm[n14],1,mean),apply(naive_repeats_fpkm[n20],1,mean),apply(naive_repeats_fpkm[n24m],1,mean),apply(naive_repeats_fpkm[n24p],1,mean),
    apply(naive_repeats_fpkm[niPS],1,mean))
naiveRepeatsSD <- cbind(apply(naive_repeats_fpkm[hiF],1,sd),apply(naive_repeats_fpkm[he0],1,sd),apply(naive_repeats_fpkm[he2],1,sd),
    apply(naive_repeats_fpkm[he6],1,sd),apply(naive_repeats_fpkm[n8],1,sd),apply(naive_repeats_fpkm[n12],1,sd),
    apply(naive_repeats_fpkm[n14],1,sd),apply(naive_repeats_fpkm[n20],1,sd),apply(naive_repeats_fpkm[n24m],1,sd),apply(naive_repeats_fpkm[n24p],1,sd),
    apply(naive_repeats_fpkm[niPS],1,sd))
colnames(naiveRepeatsAverageFpkm) <- c("hiF","he0","he2","he6","n8","n12","n14","n20","n24m","n24p","niPS")
colnames(naiveRepeatsSD) <- c("hiF","he0","he2","he6","n8","n12","n14","n20","n24m","n24p","niPS")
AmplifyNaiveRepeatsAverageFpkm <- naiveRepeatsAverageFpkm * 1e3
AmplifyNaiveRepeatsSD <- naiveRepeatsSD * 1e3

repeats_class_family <- read.table("../data/hg19.repeats.class.family")

###################################################################################################
##############################         specific gene dection        ###############################
###################################################################################################

develop_eight_cell <- apply(AmplifyDevelopmentRepeatsAverageFpkm[,c("Oocyte","Zygote","2cell","4cell","8cell","Morula","MTE","PTE","PE","EPI")],1,SpecificGene,"8cell",0.1)
# develop_eight_cell <- apply(AmplifyDevelopmentRepeatsAverageFpkm,1,SpecificGene,"8cell",0.1)
develop_8cell_gene <- names(sort(develop_eight_cell,decreasing=T)[1:100])
# write.table(develop_8cell_gene,file="8c_repeats.txt",quote=F,col.names=F,row.names=F)

###################################################################################################
###################################           plot            #####################################
###################################################################################################

ec_family <- repeatsToFamily(develop_8cell_gene,repeats_class_family)
ec_class <- repeatsToClass(develop_8cell_gene,repeats_class_family)
ec_class[ec_class!="LINE" & ec_class!="SINE" & ec_class!="LTR"] <- "OTHER"

all_family <- repeats_class_family[,3]
all_family <- as.vector(all_family)
all_class <- repeats_class_family[,2]
all_class <- as.vector(all_class)
all_class[all_class!="LINE" & all_class!="SINE" & all_class!="LTR"] <- "OTHER"

cols80 <- c("#CE001380","#16557A80","#C7A60980","#87C23280")
names(cols80) <- c("LTR","LINE","SINE","OTHER")

ec_count <- c()
family <- unique(ec_class)
for (each in family){
    ec_count <- c(ec_count,sum(ec_class==each))
}
ec_total_counts <- length(ec_class)
names(ec_count) <- family
pdf(paste("SFig3S2A_1.pdf",sep=''),width=6,height=6)
pie2(sort(ec_count)/ec_total_counts,col=cols80[family[order(ec_count)]],labels=family[order(ec_count)],border=cols80[family[order(ec_count)]],radius = 0.5,llty=2,ll=0.35)
dev.off()

tmp_count <- c()
family <- unique(all_class)
for (each in family){
    tmp_count <- c(tmp_count,sum(all_class==each))
}
total_counts <- length(all_class)
names(tmp_count) <- family
pdf(paste("SFig3S2A_2.pdf",sep=''),width=6,height=6)
pie2(sort(tmp_count)/total_counts,col=cols80[family[order(tmp_count)]],labels=family[order(tmp_count)],border=cols80[family[order(tmp_count)]],radius = 0.5,llty=2,ll=0.35)
dev.off()

x <- ec_count["LTR"]
m <- tmp_count["LTR"]
n <- total_counts - m
k <- length(ec_class)
pval <- 1-phyper(x,m,n,k)
pval
# [1] 0.007437128
# 0.00408143 
print(c(x,k-x,m-x,total_counts-m-(k-x)))

# family
cccol <- c("#CE0013","#16557A","#C7A609","#87C232","#64C0AB","#A14C94","#15A08C","#8B7E75","#1E7CAF","#EA425F","#46489A","#E50033","#0F231F","#1187CD")

ec_count <- c()
family <- unique(ec_family)
for (each in family){
    ec_count <- c(ec_count,sum(ec_family==each))
}
ec_total_counts <- length(ec_family)
names(ec_count) <- family
pdf(paste("SFig3S2A_2_1.pdf",sep=''),width=6,height=6)
pie2(sort(ec_count)/ec_total_counts,col=cccol,labels=family[order(ec_count)],border=cccol,radius = 0.5,llty=2,ll=0.35)
dev.off()

tmp_count <- c()
family <- unique(all_family)
for (each in family){
    tmp_count <- c(tmp_count,sum(all_family==each))
}
total_counts <- length(all_family)
names(tmp_count) <- family
pdf(paste("SFig3S2A_2_2.pdf",sep=''),width=6,height=6)
pie2(sort(tmp_count)/total_counts,col=cccol,labels=family[order(tmp_count)],border=cccol,radius = 0.5,llty=2,ll=0.35)
dev.off()

x <- ec_count["ERV1"]
m <- tmp_count["ERV1"]
n <- total_counts - m
k <- length(ec_class)
pval <- 1-phyper(x,m,n,k)
pval
#         ERV1 
# 0.0005906905
x <- ec_count["ERVK"]
m <- tmp_count["ERVK"]
n <- total_counts - m
k <- length(ec_class)
pval <- 1-phyper(x,m,n,k)
pval
#       ERVK 
# 0.01346819 
x <- ec_count["ERVL"]
m <- tmp_count["ERVL"]
n <- total_counts - m
k <- length(ec_class)
pval <- 1-phyper(x,m,n,k)
pval
#      ERVL 
# 0.7384948